#coding=utf-8

from bs4 import BeautifulSoup
import urllib
import urllib2
import sys

def getCurl(url,data = {},headers = {}):
        # data - dict
        if (not data):
            data = urllib.urlencode(data)
            url = url + '?' + data
        request = urllib2.Request(url, '', headers)
        response = urllib2.urlopen(request)

        return  response.read()


def postCurl(url,data = {}, headers = {}):
        # data - dict
        data = urllib.urlencode(data)
        req = urllib2.Request(url, data, headers)
        response = urllib2.urlopen(req)
        return response.read()

headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:45.0) Gecko/20100101 Firefox/45.0','Referer' : 'http:www.baidu.com'}
# url = 'http://blog.csdn.net/m0sh1'
# url = 'http://blog.csdn.net/m0sh1/article/details/45916923'
url = 'http://blog.csdn.net/m0sh1/article/details/40208391'
html_doc = getCurl(url,{},headers)
soup = BeautifulSoup(html_doc,"lxml")
# 输出完整的 HTML 内容
# print(soup.prettify())
print(soup.title)

print(soup.find("span","link_title"))

print(soup.find("span","link_title").contents)
print('=========')
# title = soup.find("span","link_title").contents[0].contents[2]
title = soup.find("span","link_title").contents[0].contents[0]
print(len(title))
print(title)
print('=========')
print(title.strip())



print(soup.find(attrs={"class": "link_title"}))

# print(soup.find("font","red"))


sys.exit()

print(soup.title)
print(soup.title.name)
print(soup.title.string)
print(soup.ins)
print(soup.ins['data-revive-zoneid'])
print(soup.find_all('meta'))
print(soup.find(id="papelist"))

# soup2 = BeautifulSoup('<div class="pagelist" id="papelist"><span> 85条  共9页</span><strong>1</strong> <a href="/m0sh1/article/list/2">2</a> <a href="/m0sh1/article/list/3">3</a> <a href="/m0sh1/article/list/4">4</a> <a href="/m0sh1/article/list/5">5</a> <a href="/m0sh1/article/list/6">...</a> <a href="/m0sh1/article/list/2">下一页</a> <a href="/m0sh1/article/list/9">尾页</a></div>')
# print soup2.prettify()

print(soup.find(id="papelist").find("strong"))
print(soup.find(id="papelist").find_all("a"))


